Report last updated Tue May 31 14:56:22 2016.
library(ggplot2)
library(reshape)
library(limma)
library(CHBUtils)
library(biomaRt)
library(dplyr)
library(cluster)
library(gridExtra)
library(logging)
library(org.Mm.eg.db)
library(DEGreport)
library(isomiRs)
library(SummarizedExperiment)
library(dplyr)
order_group=c("normal", "day3", "day7", "day14")
# root_path = "~/orch/scratch/vishal_mirna_kidney/publish/UUO-model"
result_files = file.path(root_path, "omics/files_publish")
dir.create(result_files, showWarnings = F, recursive = T)
basicConfig(level='INFO')FC > 4 and FDR < 1%
protein_file = file.path(root_path, "protein", "files_publish")
dir.create(protein_file, recursive = T, showWarnings = F)
counts = read.csv(file.path(root_path, "protein", "UUO_Proteomics_RawData.csv"),skip = 1) %>% tidyr::separate(Protein.Id, c("sp", "Uniprot.ID", "type"),sep = "[::|::]", extra = "merge")
row.names(counts) = counts$Uniprot.ID
mapping = data.frame(id=rownames(counts))
mapping = annotate_df(mapping, "id", 'mmusculus_gene_ensembl', "uniprot_genename", "ensembl_gene_id") %>% distinct(ensembl_gene_id)
mapping2 = data.frame(id=unique(counts$Gene.Symbol)[!is.na(unique(counts$Gene.Symbol))])
mapping2 = annotate_df(mapping2, "id", 'mmusculus_gene_ensembl', "wikigene_name", "ensembl_gene_id") %>% distinct(ensembl_gene_id)
counts[as.character(mapping$id), "ensembl"] = mapping$ensembl_gene_id
idx = match(as.character(mapping2$id),as.character(counts$Gene.Symbol))
counts[idx, "ensembl"] = mapping2$ensembl_gene_id
counts$symbol = convertIDs(as.character(counts$ensembl), "ENSEMBL", "SYMBOL", org.Mm.eg.db)
save_file(counts, "counts_annotated.csv", protein_file)
counts = counts %>% distinct(ensembl) %>% filter(!is.na(ensembl))
clean_counts = counts[,9:18]
rownames(clean_counts) = counts$ensembl
names(clean_counts) = sapply(names(clean_counts), tolower)
cat("## Expression density of 'raw' data\n\n")ggplot(melt(clean_counts), aes(x=log2(value), colour=variable)) + geom_density()dge = edgeR::DGEList(clean_counts)
dge = edgeR::calcNormFactors(dge, method="TMM")
coldata = data.frame(row.names=colnames(clean_counts), samples=colnames(clean_counts)) %>% tidyr::separate(samples,into = c("group"), extra = "drop")
coldata$group = sapply(coldata$group, tolower)
norm_counts = edgeR::cpm(dge, log=T)
MA <- normalizeBetweenArrays(as.matrix(norm_counts), method="none")
cat("## Expression density of normalize data\n\n")ggplot(melt(as.data.frame(MA)), aes(x=value, colour=variable)) + geom_density()cat("## MDS plot\n\n")mds(MA, condition = coldata$group)save_file(MA, "uuo_model_log2_counts.tsv", protein_file)
design <- model.matrix(~ 0 + coldata$group)
colnames(design) = c("day14", "day3", "day7", "normal")
fit <- lmFit(MA, design)
contrast.matrix <- makeContrasts(day3-normal, day7-day3, day14-day7, day14-normal, day14-day3, day7-normal, levels=design)
fit2 <- contrasts.fit(fit, contrast.matrix)
fit2 <- eBayes(fit2)
res = topTable(fit2, adjust="BH", n="Inf")
save_file(res, "uuo_model.tsv", protein_file)
absMaxFC = rowMax(as.matrix(abs(res[,1:6])))
sign = row.names(res[absMaxFC>4 & res$adj.P.Val<0.01,])
cat("## MDS plot of DE proteins\n\n")mds(MA[sign,], condition = coldata$group)coldata$group = factor(coldata$group, levels=order_group)
cat("## Clustering analysis\n\n")clusters = degPatterns(MA[sign,], coldata, minc = 15, summarize = "group", time="group", col = NULL)Working with 92 genes
.df=clusters$df
save_file(.df, "clusters_genes_cluster.tsv", protein_file)
name_res = compress_results(protein_file, prefix = "uuo_model_prot_ournorm", zip = TRUE)mrna_path = "rnaseq/final/2016-02-03_mrna/files"
mrna_matrix = read.csv(file.path(root_path, mrna_path, "uuo_model_log2_counts.tsv"), row.names = 1)
mrna_clusters = read.csv(file.path(root_path, mrna_path, "clusters_genes.tsv"), row.names = 1)
mrna_col = data.frame(row.names=colnames(mrna_matrix), samples=colnames(mrna_matrix)) %>% tidyr::separate(samples,into = c("group"), extra = "drop") %>% mutate(group=factor(group, levels=order_group))
rownames(mrna_col) = colnames(mrna_matrix)
prot_path = "protein/files_publish"
prot_matrix = read.csv(file.path(root_path, prot_path, "uuo_model_log2_counts.tsv"), row.names = 1)
prot_clusters = read.csv(file.path(root_path, prot_path, "clusters_genes_cluster.tsv"), row.names = 1)
prot_col = data.frame(row.names=colnames(prot_matrix), samples=colnames(prot_matrix)) %>% tidyr::separate(samples,into = c("group"), extra = "drop") %>% mutate(group=factor(group, levels=order_group))
rownames(prot_col) = colnames(prot_matrix)
mirna_path = "srnaseq/final/files_publish"
mirna_matrix = read.csv(file.path(root_path, mirna_path, "uuo_model_mirna_log2_counts.tsv"), row.names = 1)
load(file.path(root_path, mirna_path, "ma.rda"))
mirna_clusters = read.csv(file.path(root_path, mirna_path, "go_terms_network.tsv"))
mirna_col = data.frame(row.names=colnames(mirna_matrix), samples=colnames(mirna_matrix)) %>% tidyr::separate(samples,into = c("group"), extra = "drop") %>% mutate(group=factor(group, levels=order_group))
rownames(mirna_col) = colnames(mirna_matrix)#mrna_cluster = annotate_df(mrna_clusters, "genes", 'mmusculus_gene_ensembl', "ensembl_gene_id", "gene_biotype")
mrna_keep = as.character(mrna_clusters$genes)
prot_keep = unique(c(intersect(mrna_keep, rownames(prot_matrix)), as.character(prot_clusters$genes)))
keep = intersect(mrna_keep, rownames(prot_matrix))
df = degMerge(list(mrna=mrna_matrix[keep,], prot=prot_matrix[keep,]),
list(mrna=mrna_clusters),
list(mrna=mrna_col, prot=prot_col),
summarize="group",
time="group", col=NULL,
scale=TRUE)Working with 129 genes
Working with 28 genes
Working with 6 genes
Working with 18 genes
Working with 75 genes
prot_merged = do.call(rbind, df)
prot_merged$name = convertIDs(as.character(prot_merged$gene), "ENSEMBL", "SYMBOL", org.Mm.eg.db)
names(prot_merged) = paste0("prot_", names(prot_merged))
mi_rse = SummarizedExperiment(assays=SimpleList(norm=as.matrix(mirna_matrix)), colData= mirna_col)
gene_rse = SummarizedExperiment(assays=SimpleList(norm=as.matrix(mrna_matrix)), colData=mrna_col)
cor_tar = find_targets(mi_rse, gene_rse, ma)Dimmension of cor matrix: 121 5851
mapping = melt(cor_tar) %>% filter(value<0)
keep = intersect(mrna_keep, unique(mapping[,1]))
mirna_keep = as.character(unique(mapping[,2]))
df = degMerge(list(mrna=mrna_matrix[keep,],
mirna=mirna_matrix[mirna_keep,]),
list(mrna=mrna_clusters),
list(mrna=mrna_col, mirna=mirna_col),
summarize="group",
time="group", col=NULL,
scale=TRUE, mapping=list(mirna=mapping))Working with 29 genes
Working with 12 genes
Working with 18 genes
mirna_merged = do.call(rbind, df)
mirna_merged$name = convertIDs(as.character(mirna_merged$pair), "ENSEMBL", "SYMBOL", org.Mm.eg.db)
names(mirna_merged) = paste0("mirna_", names(mirna_merged))
merged = merge(mirna_merged[mirna_merged$mirna_cor < -.6,c(6,7,1:5)], prot_merged[prot_merged$prot_cor > .6,c(5,7,1:4)], by=1, all=TRUE)
merged_all = merge(mirna_merged[c(6,7,1:5)], prot_merged[c(5,7,1:4)], by=1, all=TRUE)
save_file(merged, "omics_filtered.csv", result_files)
save_file(merged_all, "omics.csv", result_files)
save_file(prot_merged, "prot_omics.csv", result_files)
save_file(mirna_merged, "mirna_omics.csv", result_files)(useful if replicating these results)
sessionInfo()R version 3.3.0 (2016-05-03)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Debian GNU/Linux stretch/sid
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] parallel stats4 methods stats graphics grDevices utils
[8] datasets base
other attached packages:
[1] isomiRs_0.99.18 SummarizedExperiment_1.2.2
[3] GenomicRanges_1.24.0 GenomeInfoDb_1.8.1
[5] DiscriMiner_0.1-29 DEGreport_1.5.0
[7] quantreg_5.24 SparseM_1.7
[9] org.Mm.eg.db_3.3.0 AnnotationDbi_1.34.3
[11] IRanges_2.6.0 S4Vectors_0.10.1
[13] Biobase_2.32.0 BiocGenerics_0.18.0
[15] logging_0.7-103 gridExtra_2.2.1
[17] cluster_2.0.4 dplyr_0.4.3
[19] biomaRt_2.28.0 CHBUtils_0.1
[21] limma_3.28.5 reshape_0.8.5
[23] ggplot2_2.1.0 knitr_1.13
[25] myRfunctions_0.1 rmarkdown_0.9.6
[27] BiocInstaller_1.22.2
loaded via a namespace (and not attached):
[1] tidyr_0.4.1 edgeR_3.14.0 splines_3.3.0
[4] gtools_3.5.0 Formula_1.2-1 assertthat_0.1
[7] latticeExtra_0.6-28 yaml_2.1.13 RSQLite_1.0.0
[10] lattice_0.20-33 chron_2.3-47 digest_0.6.9
[13] RColorBrewer_1.1-2 XVector_0.12.0 colorspace_1.2-6
[16] htmltools_0.3.5 Matrix_1.2-6 plyr_1.8.3
[19] DESeq2_1.12.3 XML_3.98-1.4 genefilter_1.54.2
[22] zlibbioc_1.18.0 xtable_1.8-2 scales_0.4.0
[25] gdata_2.17.0 BiocParallel_1.6.2 MatrixModels_0.4-1
[28] annotate_1.50.0 lazyeval_0.1.10 nnet_7.3-12
[31] survival_2.39-4 magrittr_1.5 evaluate_0.9
[34] GGally_1.0.1 gplots_3.0.1 foreign_0.8-66
[37] tools_3.3.0 data.table_1.9.6 formatR_1.4
[40] stringr_1.0.0 locfit_1.5-9.1 munsell_0.4.3
[43] caTools_1.17.1 grid_3.3.0 RCurl_1.95-4.8
[46] labeling_0.3 bitops_1.0-6 codetools_0.2-14
[49] gtable_0.2.0 DBI_0.4-1 R6_2.1.2
[52] Nozzle.R1_1.1-1 Hmisc_3.17-4 KernSmooth_2.23-15
[55] stringi_1.1.1 Rcpp_0.12.5 geneplotter_1.50.0
[58] rpart_4.1-10 acepack_1.3-3.3 coda_0.18-1